This Rmarkdown file shows the code we used to analyze demographic change over time in and outside of historic districts.
knitr::opts_chunk$set(echo = TRUE, message=F, warning=F, fig.width = 11.5, fig.height = 6.5)
library(readr)
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(leaflet)
library(rstudioapi)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(patchwork)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Set working directory to the place this script is saved:
# Getting the path of your current open file
current_path = rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
Load our data. We’ll first do the analysis using Census tracts, and then later use Census blocks.
# load data that has the dates the historic districts were designated
# comes from here: https://planning.dc.gov/page/dc-historic-districts
# hd_data <- readr::read_csv("https://docs.google.com/spreadsheets/d/1Ajl1iAS0NRB7vk_UFDveeWzGkwf3tuiDo-zV9_wtzRM/gviz/tq?tqx=out:csv&sheet=data")
hd_data <- readr::read_csv("hd_data/data.csv")
# load the historic district boundary shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::historic-districts/about
hd_shp <- sf::st_read("Historic_Districts/Historic_Districts.shp")
## Reading layer `Historic_Districts' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\Historic_Districts\Historic_Districts.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 73 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -8584936 ymin: 4696608 xmax: -8564736 ymax: 4720410
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the 2022 ward shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::wards-from-2022/about
ward_shp <- sf::st_read("Wards_from_2022/Wards_from_2022.shp")
## Reading layer `Wards_from_2022' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\Wards_from_2022\Wards_from_2022.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 20 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the zoning map:
# comes from here: https://opendata.dc.gov/datasets/DCGIS::zoning-boundaries-zoning-regulations-of-2016/about
zone_shp <- sf::st_read("zoning/Zoning_Boundaries_(Zoning_Regulations_of_2016).shp")
## Reading layer `Zoning_Boundaries_(Zoning_Regulations_of_2016)' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\zoning\Zoning_Boundaries_(Zoning_Regulations_of_2016).shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 953 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load & clean census tract data
# please see https://opendata.dc.gov/datasets/DCGIS::census-tracts-in-1970/about
# for example, for more details on variable names
load_clean_tracts <- function(geo_id_var, black_var, white_var, totpop_var, year) {
# Loads the shapefile, removes unneeded columns, calculates the tract area in meters^2
shp <- sf::st_read(paste0("tract_data/Census_Tracts_in_", year,
"/Census_Tracts_in_", year, ".shp"))
shp <- shp %>%
rename("geo_id" = !!sym(geo_id_var),
"n_black" = !!sym(black_var),
"n_white" = !!sym(white_var),
"n_tot" = !!sym(totpop_var)
) %>%
select("geo_id", starts_with("n_")) %>%
mutate(n_other = n_tot - (n_black + n_white),
year = year)
shp$geo_area_meters <- sf::st_area(shp)
shp <- sf::st_transform(shp, 4326)
return(shp %>% select("year", "geo_id", "n_tot", "n_black", "n_white", "n_other", "geo_area_meters", "geometry"))
}
t60_shp <- load_clean_tracts("GISJOIN", "B58013", "B58011", "CA4001", 1960)
t70_shp <- load_clean_tracts("GISJOIN", "CEB03", "CEB01", "CY7001", 1970)
t80_shp <- load_clean_tracts("GISJOIN", "C9D003", "C9D001", "C7L001", 1980)
t90_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "POPULATION", 1990)
t00_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "TOTAL", 2000)
t10_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2010)
t20_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2020)
gc()
Merge historic distrit (HD) data onto HD shapefile, subset to only look at neighborhood HDs:
hd_shp <- dplyr::left_join(x = hd_shp, y = hd_data, by = "UNIQUEID")
hd_shp <- hd_shp[hd_shp$Neighborhood_HD==1,]
Transform shape files to mercator projection:
# convert to mercator projection
zone_shp <- sf::st_transform(zone_shp, 4326)
hd_shp <- sf::st_transform(hd_shp, 4326)
ward_shp <- sf::st_transform(ward_shp, 4326)
Let’s overlay HDs onto DC’s zoning map.
# list zones:
zones_list <- sort(unique(zone_shp$ZR16))
housing_zones <- zones_list[grep(x=zones_list, pattern = "^R|^MU")]
# subset
zone_shp <- zone_shp[zone_shp$ZR16 %in% housing_zones,]
# create simplified labels
zone_shp$ZR16_simple <- "Other"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RA-")] <- "Apartment zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^R-")] <- "Residential zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RF-")] <- "Residential flat zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^MU-")] <- "Mixed use zones"
# show on a map:
factpal <- colorFactor(palette = "Set1", domain = zone_shp$ZR16_simple)
leaflet(zone_shp) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~factpal(ZR16_simple), # Apply the color function
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~ZR16,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = TRUE
)
) %>%
addLegend(pal = factpal, values = ~ZR16_simple, opacity = 0.7, title = NULL,
position = "bottomright")
Calculate the total amount of residential and MU land in DC:
# need to fix broken geometries:
fix_geo_if_broken <- function(shp) {
if (min(sf::st_is_valid(shp)) == 0) {
print("Fixing geometry...")
return(sf::st_make_valid(shp))
} else {
return(shp)
}
}
zone_shp <- fix_geo_if_broken(zone_shp)
## [1] "Fixing geometry..."
hd_shp <- fix_geo_if_broken(hd_shp)
ward_shp <- fix_geo_if_broken(ward_shp)
t60_shp <- fix_geo_if_broken(t60_shp)
t70_shp <- fix_geo_if_broken(t70_shp)
t80_shp <- fix_geo_if_broken(t80_shp)
t90_shp <- fix_geo_if_broken(t90_shp)
t00_shp <- fix_geo_if_broken(t00_shp)
t10_shp <- fix_geo_if_broken(t10_shp)
t20_shp <- fix_geo_if_broken(t20_shp)
zone_shp$area_meters <- sf::st_area(zone_shp)
zone_shp$area_acres <- as.vector(zone_shp$area_meters * 0.000247105)
total_zone_acres <- sum(zone_shp$area_acres, na.rm=T)
Total and % of land area covered by HDs over time:
# get shape areas:
hd_shp$area_meters <- sf::st_area(hd_shp)
hd_shp$area_acres <- as.vector(hd_shp$area_meters * 0.000247105)
years <- c(1960, 1970, 1980, 1990, 2000, 2010, 2025)
land_areas <- rep(NA, length(years))
counter <- 1
for (year in years) { # Land area covered by HDs by year:
land_area <- sum(hd_shp$area_acres[hd_shp$desig_date < year])
land_areas[counter] <- land_area
counter <- counter + 1
}
p <- data.frame(years, land_areas, round(100*land_areas / total_zone_acres,0))
names(p) <- c("Year",
"Area covered by by Historic Districts, in Acres",
"Percent of 2016 residential zone covered by Neighborhood HD")
plot1 <-
ggplot(p,
aes(x=Year,
y=`Area covered by by Historic Districts, in Acres`)) +
geom_bar(stat = "identity", fill="#0f9535") +
geom_text(aes(label = round(`Area covered by by Historic Districts, in Acres`, 0), vjust = -1.7)) +
ylab("Acres") +
theme_minimal() +
ggtitle('Acres covered by "neighborhood historic districts"\nhas steadily increased over time')
plot2 <-
ggplot(p,
aes(x=Year,
y=`Percent of 2016 residential zone covered by Neighborhood HD`)) +
geom_line(color="#0f9535", size=.75) +
geom_point(color="#0f9535") +
geom_text(aes(label = paste0(`Percent of 2016 residential zone covered by Neighborhood HD`, "%")), vjust = -1.7) +
theme_minimal() +
ylab("Percent") +
ggtitle('And the % of residential area covered\nby HDs has more than doubled since 1980')
plot1 + plot2
Let’s quickly compare which HDs were designated before vs after 1980:
hd_shp$flag_1980 <- 0
hd_shp$flag_1980[hd_shp$desig_date < 1980] <- 1
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(group= "Designated before 1980",
data=hd_shp[hd_shp$flag_1980==1,],
fillColor = "skyblue",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~LABEL,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addPolygons(group= "Designated after 1980",
data=hd_shp[hd_shp$flag_1980==0,],
fillColor = "hotpink",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~LABEL,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addLayersControl(
overlayGroups = c("Designated before 1980", "Designated after 1980"),
options = layersControlOptions(collapsed = FALSE)
)
We’ll remove some very small HDs for the next part (like HDs that are a single circle):
# Drop some very small HDs that are like a single circle
# We just dropped anything smaller than 10 acres
hd_shp <-
hd_shp %>%
filter(!(LABEL %in% c("Emerald St HD",
"Grant Rd HD",
"Mount Vernon Triangle HD",
"Grant Circle HD",
"Union Market"
)))
# turn off spherical geometry (s2)
sf_use_s2(FALSE)
Now let’s look and see how HDs changed over time, in terms of % black residents and % white residents, compared to nearby neighborhoods (tracts) that were not in HDs.
This will have a few steps:
Function to find which tracts/blocks are in which HDs:
get_geos_in_hd <- function(shp, min_pct, year) {
# This function gets the tracts that are in each HD in a given year
# shp: the tract or block shapefile (an sf shapefile object)
# min_pct: the minimum % of the tract or block that must be in the HD to count as part of the HD (a decimal # between 0 and 1)
# year: the year (an integer like 1980)
i <- sf::st_intersection(x=shp, y=hd_shp)
i$i_area <- sf::st_area(i)
i$pct_of_geo_area <- as.vector(i$i_area / i$geo_area_meters)
geos_in_hd <- i[i$pct_of_geo_area > min_pct,
c("year", "geo_id", "LABEL",
"n_tot", "n_black", "n_white", "n_other",
"desig_date", "pct_of_geo_area")]
geos_in_hd_summary <-
geos_in_hd %>%
mutate(n_tot_prorated = n_tot * pct_of_geo_area,
n_black_prorated = n_black * pct_of_geo_area,
n_white_prorated = n_white * pct_of_geo_area,
n_other_prorated = n_other * pct_of_geo_area) %>%
select("year", "LABEL", "geo_id", starts_with("n_"), "desig_date") %>%
group_by(LABEL) %>%
summarise(n_tot = sum(n_tot, na.rm=T),
n_black = sum(n_black, na.rm=T),
n_white = sum(n_white, na.rm=T),
n_other = sum(n_other, na.rm=T),
n_tot_prorated = sum(n_tot_prorated, na.rm=T),
n_black_prorated = sum(n_black_prorated, na.rm=T),
n_white_prorated = sum(n_white_prorated, na.rm=T),
n_other_prorated = sum(n_other_prorated, na.rm=T),
desig_year = max(desig_date, na.rm=T)) %>%
mutate(desig_yet = ifelse(desig_year<year, 1, 0),
year = year)
rv = list("geos_in_hd"=geos_in_hd, "summary"=sf::st_drop_geometry(geos_in_hd_summary))
return(rv)
}
Function to find which tracts/blocks are nearby but not inside the HDs:
get_neighbor_geos <- function(hd_shp, geo_shp, buffer_dist, geos_in_hd, remove_geo_thresh, year) {
# first make a buffer around the HDs
b <- sf::st_buffer(hd_shp, dist = buffer_dist)
# then get the intersection of the buffer and the tracts
i <- sf::st_intersection(x=geo_shp, y=b)
# remove tracts that have already been classified as within an HD
i <- i[!(i$geo_id %in% geos_in_hd$geos_in_hd$geo_id),]
# also remove any tracts/blocks for which more than X% of an HD is in that tract/block
hd_shp$area_meters <- sf::st_area(hd_shp)
hd_geo <- sf::st_intersection(x=geo_shp, y=hd_shp)
hd_geo$intersect_area <- sf::st_area(hd_geo)
hd_geo$pct_area <- as.vector(hd_geo$intersect_area / hd_geo$area_meters)
geos_to_remove <- hd_geo$geo_id[hd_geo$pct_area > remove_geo_thresh]
neighboring_geos <- i[!(i$geo_id %in% geos_to_remove),]
# finally, remove
neighboring_geos <- geo_shp[geo_shp$geo_id %in% neighboring_geos$geo_id,]
neighboring_geos <- dplyr::left_join(neighboring_geos,
sf::st_drop_geometry(i[,c("geo_id", "LABEL", "desig_date")]),
by="geo_id")
neighbor_geos_summary <-
sf::st_drop_geometry(neighboring_geos) %>%
group_by(LABEL) %>%
summarise(n_tot = sum(n_tot, na.rm=T),
n_black = sum(n_black, na.rm=T),
n_white = sum(n_white, na.rm=T),
n_other = sum(n_other, na.rm=T),
desig_year = max(desig_date, na.rm=T)) %>%
mutate(desig_yet = ifelse(desig_year<year, 1, 0),
year = year)
rv = list("buffers" = b,
"neighbor_geos" = neighboring_geos,
"neighbor_geos_summary" = neighbor_geos_summary)
return(rv)
}
Function to map those tracts/blocks and see if everything looks good:
plot_geos <- function(geo_shp, nearby_geos_shp, geos_in_hds) {
rv <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(group='buffers', data=nearby_geos_shp[["buffers"]]) %>%
addPolygons(group= "geos labeled as near HDs",
data=nearby_geos_shp[["neighbor_geos"]],
fillColor = "skyblue",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~paste0("Geo: ", geo_id, "; HD neighbor: ", LABEL),
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addPolygons(group= "geos labeled as in HDs",
data=geo_shp[geo_shp$geo_id %in% geos_in_hds, ],
fillColor = "limegreen",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~geo_id,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addPolygons(group= "HDs",
data=hd_shp,
fillColor = "hotpink",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~LABEL,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addLayersControl(
overlayGroups = c("geos labeled as near HDs", "geos labeled as in HDs", "HDs", "buffers"),
options = layersControlOptions(collapsed = FALSE)
)
return(rv)
}
Function to run regressions:
run_regressions <- function(hd_comp_df, near_comp_df, weights=F) {
hd_comp_df <-
hd_comp_df %>%
select(-ends_with("_prorated"), -desig_year) %>%
group_by(LABEL, desig_yet, year) %>%
summarise_all(., sum) %>%
mutate(pct_black = n_black / n_tot,
pct_white = n_white / n_tot) %>%
rename_with(~ paste0(., "_hd"))
near_comp_df <-
near_comp_df %>%
select(-desig_year) %>%
group_by(LABEL, desig_yet, year) %>%
mutate(pct_black = n_black / n_tot,
pct_white = n_white / n_tot) %>%
summarise_all(., sum) %>%
rename_with(~ paste0(., "_near"))
comp_df <- dplyr::full_join(x = hd_comp_df,
y = near_comp_df,
by = c("LABEL_hd"="LABEL_near",
"desig_yet_hd"="desig_yet_near",
"year_hd"="year_near"))
comp_df_copy <- comp_df
comp_df <- comp_df %>% select(starts_with(c("LABEL", "desig_yet", "year", "pct_")))
comp_df <-
comp_df %>%
tidyr::pivot_longer(
cols = starts_with("pct_"),
names_to = c("group", "treatment_control"),
names_pattern = "pct_([a-zA-Z]+)_([a-zA-Z]+)",
values_to = "percent")
comp_df <- dplyr::left_join(x = comp_df,
y = comp_df_copy %>% select("LABEL_hd", "desig_yet_hd", "year_hd", "n_tot_hd"),
by = c("LABEL_hd",
"desig_yet_hd",
"year_hd"))
# remove HDs that are always HDs or always not HDs during the timeframe
comp_df <-
comp_df %>%
group_by(LABEL_hd) %>%
mutate(mean_status = mean(desig_yet_hd, na.rm=T)) %>%
filter(mean_status > 0) %>%
filter(mean_status < 1)
comp_df$treated <- 0
comp_df$treated[comp_df$treatment_control=="hd"] <- 1
comp_df$LABEL_hd <- as.factor(comp_df$LABEL_hd)
comp_df$year_hd <- as.factor(comp_df$year_hd)
if (weights) {
# diff in diff analysis for change in the % of black residents
diff_in_diff_b <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
LABEL_hd + # fixed effect for HD area
year_hd, # fixed effect for year
data = comp_df[comp_df$group=="black",],
weights=n_tot_hd)
# diff in diff analysis for change in the % of white residents
diff_in_diff_w <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
LABEL_hd + # fixed effect for HD area
year_hd, # fixed effect for year
data = comp_df[comp_df$group=="white",],
weights=n_tot_hd)
text_bit <- ", weighted by HD population"
} else {
# diff in diff analysis for change in the % of black residents
diff_in_diff_b <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
LABEL_hd + # fixed effect for HD area
year_hd, # fixed effect for year
data = comp_df[comp_df$group=="black",])
# diff in diff analysis for change in the % of white residents
diff_in_diff_w <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
LABEL_hd + # fixed effect for HD area
year_hd, # fixed effect for year
data = comp_df[comp_df$group=="white",])
text_bit <- ", NOT weighted by HD population"
}
print("__________________________________")
print(paste0("D-in-D regression for the % of black residents", text_bit))
print(summary(diff_in_diff_b))
print("__________________________________")
print(paste0("D-in-D regression for the % of white residents", text_bit))
print(summary(diff_in_diff_w))
comp_df <-
comp_df %>%
group_by(LABEL_hd, desig_yet_hd) %>%
mutate(first_decade_desig=min(as.numeric(as.character(year_hd)))) %>%
mutate(first_decade_desig=ifelse(desig_yet_hd==0, 0, first_decade_desig)) %>%
ungroup() %>%
group_by(LABEL_hd) %>%
mutate(first_decade_desig=max(first_decade_desig)) %>%
mutate(year_index = as.numeric(as.character(year_hd)) - first_decade_desig)
comp_df <-
comp_df %>%
ungroup() %>%
group_by(LABEL_hd, group, treatment_control) %>%
mutate(percent_std = (percent - mean(percent, na.rm=T)) / sd(percent, na.rm=T))
return(comp_df)
}
Call all the functions we created above:
# ranging the min % between .2 and .6 seems to give reasonable results
mp = 0.25
hd_geos60 <- get_geos_in_hd(t60_shp, min_pct = mp, year = 1960)
hd_geos70 <- get_geos_in_hd(t70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(t80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(t90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(t00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(t10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(t20_shp, min_pct = mp, year = 2020)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1474957 78.8 2423541 129.5 2423541 129.5
## Vcells 4400916 33.6 10146329 77.5 10146324 77.5
# the buffer distance is in decimal degrees
buff_dist = .005
threshold = .1
nearby_tracts60 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t60_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1960)
nearby_tracts70 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_tracts80 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_tracts90 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1474737 78.8 2423541 129.5 2423541 129.5
## Vcells 4476328 34.2 10146329 77.5 10146324 77.5
nearby_tracts00 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_tracts10 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_tracts20 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1475871 78.9 2423541 129.5 2423541 129.5
## Vcells 4534097 34.6 10146329 77.5 10146324 77.5
Plot our classifications in 1960 and 2020, as a gut check:
plot_geos(t60_shp, nearby_tracts60, hd_geos60$geos_in_hd$geo_id)
plot_geos(t20_shp, nearby_tracts20, hd_geos20$geos_in_hd$geo_id)
Now compare the demographics of the HD tracts and their neighbors in each year:
hd_comp_df <- dplyr::bind_rows(hd_geos60[[2]], hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]],
hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)
near_comp_df <- dplyr::bind_rows(nearby_tracts60[[3]], nearby_tracts70[[3]],nearby_tracts80[[3]], nearby_tracts90[[3]],
nearby_tracts00[[3]], nearby_tracts10[[3]], nearby_tracts20[[3]],)
comp_df <- run_regressions(hd_comp_df, near_comp_df)
## [1] "__________________________________"
## [1] "D-in-D regression for the % of black residents, NOT weighted by HD population"
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## LABEL_hd + year_hd, data = comp_df[comp_df$group == "black",
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51441 -0.08561 0.00642 0.10008 0.32372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.982139 0.050620 19.402 < 2e-16 ***
## treated -0.032361 0.028279 -1.144 0.253546
## desig_yet_hd -0.045030 0.034295 -1.313 0.190355
## LABEL_hdBlagden Alley/Naylor Court HD -0.179310 0.071455 -2.509 0.012712 *
## LABEL_hdBloomingdale HD -0.168882 0.061650 -2.739 0.006589 **
## LABEL_hdCapitol Hill HD -0.346544 0.058854 -5.888 1.22e-08 ***
## LABEL_hdCleveland Park HD -0.820924 0.059037 -13.905 < 2e-16 ***
## LABEL_hdDowntown HD -0.247801 0.072154 -3.434 0.000693 ***
## LABEL_hdDupont Circle HD -0.763644 0.059926 -12.743 < 2e-16 ***
## LABEL_hdFinancial HD -0.613816 0.154058 -3.984 8.83e-05 ***
## LABEL_hdFoggy Bottom HD -0.886229 0.074670 -11.869 < 2e-16 ***
## LABEL_hdFoxhall HD -0.933840 0.072154 -12.942 < 2e-16 ***
## LABEL_hdGeorgetown HD -0.842525 0.059026 -14.274 < 2e-16 ***
## LABEL_hdGreater 14th St HD -0.443055 0.072443 -6.116 3.57e-09 ***
## LABEL_hdGreater U St HD -0.239301 0.059571 -4.017 7.75e-05 ***
## LABEL_hdGWU / Old West End HD -0.900888 0.076070 -11.843 < 2e-16 ***
## LABEL_hdKalorama Triangle HD -0.692670 0.059037 -11.733 < 2e-16 ***
## LABEL_hdKingman Park HD -0.052059 0.062752 -0.830 0.407540
## LABEL_hdLafayette Square HD -0.911952 0.079354 -11.492 < 2e-16 ***
## LABEL_hdLeDroit Park HD -0.152142 0.071058 -2.141 0.033210 *
## LABEL_hdLogan Circle HD -0.209050 0.074684 -2.799 0.005514 **
## LABEL_hdMassachusetts Ave HD -0.881518 0.074619 -11.814 < 2e-16 ***
## LABEL_hdMeridian Hill -0.408705 0.070015 -5.837 1.60e-08 ***
## LABEL_hdMt. Pleasant HD -0.491427 0.059037 -8.324 5.14e-15 ***
## LABEL_hdMt. Vernon Square HD -0.136578 0.060485 -2.258 0.024786 *
## LABEL_hdPennsylvania Ave NHS -0.426778 0.066439 -6.424 6.44e-10 ***
## LABEL_hdShaw HD -0.196403 0.059571 -3.297 0.001116 **
## LABEL_hdSheridan-Kalorama HD -0.776764 0.059037 -13.157 < 2e-16 ***
## LABEL_hdSixteenth St HD -0.250765 0.071058 -3.529 0.000494 ***
## LABEL_hdStrivers' Section HD -0.513097 0.075310 -6.813 6.80e-11 ***
## LABEL_hdTakoma Park HD -0.281888 0.071088 -3.965 9.52e-05 ***
## LABEL_hdWashington Heights HD -0.693408 0.063764 -10.875 < 2e-16 ***
## LABEL_hdWoodley Park HD -0.807681 0.071455 -11.303 < 2e-16 ***
## year_hd1970 0.079270 0.032357 2.450 0.014960 *
## year_hd1980 0.086808 0.033812 2.567 0.010816 *
## year_hd1990 0.051707 0.036592 1.413 0.158856
## year_hd2000 0.008481 0.040492 0.209 0.834269
## year_hd2010 -0.110781 0.043595 -2.541 0.011640 *
## year_hd2020 -0.196117 0.045568 -4.304 2.39e-05 ***
## treated:desig_yet_hd -0.098483 0.036399 -2.706 0.007275 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1457 on 256 degrees of freedom
## (116 observations deleted due to missingness)
## Multiple R-squared: 0.8519, Adjusted R-squared: 0.8294
## F-statistic: 37.77 on 39 and 256 DF, p-value: < 2.2e-16
##
## [1] "__________________________________"
## [1] "D-in-D regression for the % of white residents, NOT weighted by HD population"
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## LABEL_hd + year_hd, data = comp_df[comp_df$group == "white",
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31063 -0.08385 -0.01052 0.07917 0.45470
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.096334 0.047168 2.042 0.042141 *
## treated 0.021254 0.026351 0.807 0.420664
## desig_yet_hd 0.028035 0.031956 0.877 0.381145
## LABEL_hdBlagden Alley/Naylor Court HD 0.106677 0.066583 1.602 0.110350
## LABEL_hdBloomingdale HD 0.121217 0.057446 2.110 0.035820 *
## LABEL_hdCapitol Hill HD 0.309114 0.054841 5.637 4.57e-08 ***
## LABEL_hdCleveland Park HD 0.741173 0.055011 13.473 < 2e-16 ***
## LABEL_hdDowntown HD 0.171784 0.067234 2.555 0.011197 *
## LABEL_hdDupont Circle HD 0.656760 0.055839 11.762 < 2e-16 ***
## LABEL_hdFinancial HD 0.597421 0.143552 4.162 4.32e-05 ***
## LABEL_hdFoggy Bottom HD 0.769557 0.069578 11.060 < 2e-16 ***
## LABEL_hdFoxhall HD 0.854456 0.067234 12.709 < 2e-16 ***
## LABEL_hdGeorgetown HD 0.773753 0.055001 14.068 < 2e-16 ***
## LABEL_hdGreater 14th St HD 0.324464 0.067503 4.807 2.62e-06 ***
## LABEL_hdGreater U St HD 0.148381 0.055509 2.673 0.007999 **
## LABEL_hdGWU / Old West End HD 0.778624 0.070883 10.985 < 2e-16 ***
## LABEL_hdKalorama Triangle HD 0.585505 0.055011 10.643 < 2e-16 ***
## LABEL_hdKingman Park HD 0.028819 0.058473 0.493 0.622528
## LABEL_hdLafayette Square HD 0.795959 0.073943 10.765 < 2e-16 ***
## LABEL_hdLeDroit Park HD 0.079301 0.066212 1.198 0.232149
## LABEL_hdLogan Circle HD 0.142204 0.069591 2.043 0.042035 *
## LABEL_hdMassachusetts Ave HD 0.770931 0.069531 11.088 < 2e-16 ***
## LABEL_hdMeridian Hill 0.230193 0.065241 3.528 0.000496 ***
## LABEL_hdMt. Pleasant HD 0.323962 0.055011 5.889 1.22e-08 ***
## LABEL_hdMt. Vernon Square HD 0.061670 0.056361 1.094 0.274891
## LABEL_hdPennsylvania Ave NHS 0.295259 0.061908 4.769 3.11e-06 ***
## LABEL_hdShaw HD 0.096201 0.055509 1.733 0.084287 .
## LABEL_hdSheridan-Kalorama HD 0.668434 0.055011 12.151 < 2e-16 ***
## LABEL_hdSixteenth St HD 0.141060 0.066212 2.130 0.034091 *
## LABEL_hdStrivers' Section HD 0.422460 0.070175 6.020 6.01e-09 ***
## LABEL_hdTakoma Park HD 0.201627 0.066241 3.044 0.002579 **
## LABEL_hdWashington Heights HD 0.566134 0.059416 9.528 < 2e-16 ***
## LABEL_hdWoodley Park HD 0.699163 0.066583 10.501 < 2e-16 ***
## year_hd1970 -0.086209 0.030150 -2.859 0.004596 **
## year_hd1980 -0.117891 0.031507 -3.742 0.000226 ***
## year_hd1990 -0.096791 0.034097 -2.839 0.004893 **
## year_hd2000 -0.156409 0.037731 -4.145 4.62e-05 ***
## year_hd2010 0.007534 0.040622 0.185 0.853017
## year_hd2020 0.010962 0.042460 0.258 0.796475
## treated:desig_yet_hd 0.105528 0.033917 3.111 0.002073 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1358 on 256 degrees of freedom
## (116 observations deleted due to missingness)
## Multiple R-squared: 0.8459, Adjusted R-squared: 0.8224
## F-statistic: 36.03 on 39 and 256 DF, p-value: < 2.2e-16
plot_ly(
data = comp_df[comp_df$group=="black",] %>% arrange(LABEL_hd, year_index),
x = ~year_index,
y = ~percent,
color = ~LABEL_hd, # Specify the grouping variable for color
linetype = ~as.factor(treatment_control),
type = "scatter",
mode = "lines+markers"
)
plot_ly(
data = comp_df[comp_df$group=="white",] %>% arrange(LABEL_hd, year_index),
x = ~year_index,
y = ~percent_std,
color = ~LABEL_hd, # Specify the grouping variable for color
linetype = ~as.factor(treatment_control),
type = "scatter",
mode = "lines+markers"
)
Now we’re going to repeat all of that, but using block groups instead of tracts. We will have to rely on sightly different versions of the Census data to do this, downloaded from NHGIS rather than Open Data DC. We’re also only going back to 1970, since that’s the data I could find relatively easily.
Load and clean block data:
# b70_shp <- sf::st_read("block_shapes/US_block_1970/US_block_1970.shp")
# b70_shp <- b70_shp[b70_shp$STATE70=="11",]
# sf::st_write(b70_shp, "block_shapes/DC_block_1970/DC_block_1970.shp")
b70_shp <- sf::st_read("block_shapes/DC_block_1970/DC_block_1970.shp")
## Reading layer `DC_block_1970' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1970\DC_block_1970.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 4665 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b80_shp <- sf::st_read("block_shapes/DC_block_1980/DC_block_1980.shp")
## Reading layer `DC_block_1980' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1980\DC_block_1980.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 4627 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b90_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_1990/DC_block_1990.shp")
## Reading layer `DC_block_1990' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_1990\DC_block_1990.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5140 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b00_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_2000/DC_block_2000.shp")
## Reading layer `DC_block_2000' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_2000\DC_block_2000.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5626 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b10_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2010_110_block_2010/DC_block_2010.shp")
## Reading layer `DC_block_2010' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2010_110_block_2010\DC_block_2010.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 6426 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329361
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b20_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2020_110_block_2020/DC_block_2020.shp")
## Reading layer `DC_block_2020' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2020_110_block_2020\DC_block_2020.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5935 features and 18 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329396
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b70_df <- readr::read_csv("block_data/nhgis0093_ds96_1970_block.csv")
b80_df <- readr::read_csv("block_data/nhgis_ds104_1980_block_11.csv")
b90_df <- readr::read_csv("block_data/nhgis0092_ds120_1990_block.csv")
b00_df <- readr::read_csv("block_data/nhgis0092_ds147_2000_block.csv")
b10_df <- readr::read_csv("block_data/nhgis0092_ds172_2010_block.csv")
b20_df <- readr::read_csv("block_data/nhgis0092_ds258_2020_block.csv")
clean_block_data <- function(shp, df, shp_b_id, df_b_id, var_prefix, df_n_black, df_n_white, drop_var="", year) {
shp <- shp %>% select(!!sym(shp_b_id)) %>% rename("geo_id" = !!sym(shp_b_id))
if (drop_var!="") {df <- df %>% select(-!!sym(drop_var))}
df <- df %>%
select(!!sym(df_b_id), starts_with(var_prefix)) %>%
rowwise() %>%
mutate(n_tot = sum(c_across(starts_with(var_prefix))))
df <- df %>%
rename("geo_id" = !!sym(df_b_id),
"n_black" = !!sym(df_n_black),
"n_white" = !!sym(df_n_white)) %>%
select(-starts_with(var_prefix)) %>%
mutate(n_other = n_tot - (n_black + n_white))
shp <- dplyr::left_join(shp, df, by="geo_id")
shp <- sf::st_transform(shp, 4326)
shp$geo_area_meters <- sf::st_area(shp)
shp$year <- year
return(shp)
}
# 1970 block data has to be specially cleaned, see https://forum.ipums.org/t/race-ethnicity-data-at-a-block-level-from-1970/6178
b70_df$c_black <- b70_df$CM6001 + b70_df$CM6002
b70_df$c_other <- b70_df$CM6003 + b70_df$CM6004
b70_df$c_white <- b70_df$CM5001 + b70_df$CM5002 - b70_df$c_black - b70_df$c_other
b70_shp <- clean_block_data(b70_shp, b70_df, "GISJOIN", "GISJOIN", "c_", "c_black", "c_white", year=1970)
b80_shp <- clean_block_data(b80_shp, b80_df, "GISJOIN", "GISJOIN", "C9D0", "C9D002", "C9D001", year=1980)
b90_shp <- clean_block_data(b90_shp, b90_df, "GISJOIN", "GISJOIN", "EUY0", "EUY002", "EUY001", year=1990)
b00_shp <- clean_block_data(b00_shp, b00_df, "GISJOIN", "GISJOIN", "FYE0", "FYE002", "FYE001", year=2000)
b10_shp <- clean_block_data(b10_shp, b10_df, "GISJOIN", "GISJOIN", "H7X", "H7X003", "H7X002", "H7X001", year=2010)
b20_shp <- clean_block_data(b20_shp, b20_df, "GISJOIN", "GISJOIN", "U7J", "U7J003", "U7J002", "U7J001", year=2020)
rm(b70_df, b80_df, b90_df, b00_df, b10_df, b20_df)
plot(sf::st_geometry(b70_shp["geo_id"]))
Get the blocks in the HDs:
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1890849 101.0 3863505 206.4 3863505 206.4
## Vcells 6995192 53.4 19205019 146.6 14490530 110.6
mp = 0.25 # TODO: run analysis varying this factor
hd_geos70 <- get_geos_in_hd(b70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(b80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(b90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(b00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(b10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(b20_shp, min_pct = mp, year = 2020)
Get the blocks neighboring the HDs:
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1932250 103.2 3863505 206.4 3863505 206.4
## Vcells 7248118 55.3 19205019 146.6 14490530 110.6
buff_dist = .005
threshold = .1
nearby_blocks70 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_blocks80 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_blocks90 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
nearby_blocks00 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_blocks10 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_blocks20 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)
Again let’s peek at a map to make sure everything looks ok:
plot_geos(b70_shp, nearby_blocks70, hd_geos70$geos_in_hd$geo_id)
plot_geos(b20_shp, nearby_blocks20, hd_geos20$geos_in_hd$geo_id)
Now compare the demographics of the HD blocks and their neighbors in each year:
hd_comp_df <- dplyr::bind_rows(hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]],
hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)
near_comp_df <- dplyr::bind_rows(nearby_blocks70[[3]], nearby_blocks80[[3]], nearby_blocks90[[3]],
nearby_blocks00[[3]], nearby_blocks10[[3]], nearby_blocks20[[3]],)
comp_df <- run_regressions(hd_comp_df, near_comp_df, weights = T)
## [1] "__________________________________"
## [1] "D-in-D regression for the % of black residents, weighted by HD population"
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## LABEL_hd + year_hd, data = comp_df[comp_df$group == "black",
## ], weights = n_tot_hd)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -24.380 -4.624 -0.363 4.475 35.011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.14660 0.04097 27.987 < 2e-16 ***
## treated -0.08144 0.02256 -3.609 0.000357 ***
## desig_yet_hd -0.02135 0.03029 -0.705 0.481322
## LABEL_hdBlagden Alley/Naylor Court HD -0.38117 0.10894 -3.499 0.000534 ***
## LABEL_hdBloomingdale HD -0.14295 0.05187 -2.756 0.006194 **
## LABEL_hdCapitol Hill HD -0.39522 0.03997 -9.887 < 2e-16 ***
## LABEL_hdCleveland Park HD -0.83784 0.05071 -16.522 < 2e-16 ***
## LABEL_hdDowntown HD -0.53768 0.09027 -5.956 6.88e-09 ***
## LABEL_hdDupont Circle HD -0.76203 0.04292 -17.754 < 2e-16 ***
## LABEL_hdFinancial HD -0.51235 0.20734 -2.471 0.013998 *
## LABEL_hdFoggy Bottom HD -0.88040 0.08025 -10.971 < 2e-16 ***
## LABEL_hdFoxhall HD -0.90199 0.10099 -8.931 < 2e-16 ***
## LABEL_hdGreater 14th St HD -0.47498 0.04728 -10.046 < 2e-16 ***
## LABEL_hdGreater U St HD -0.29168 0.04681 -6.231 1.48e-09 ***
## LABEL_hdGWU / Old West End HD -0.82781 0.06462 -12.811 < 2e-16 ***
## LABEL_hdKalorama Triangle HD -0.72507 0.05502 -13.179 < 2e-16 ***
## LABEL_hdKingman Park HD -0.03279 0.06005 -0.546 0.585434
## LABEL_hdLafayette Square HD -0.88739 0.53325 -1.664 0.097082 .
## LABEL_hdLeDroit Park HD -0.09665 0.07367 -1.312 0.190523
## LABEL_hdLogan Circle HD -0.41625 0.07762 -5.363 1.58e-07 ***
## LABEL_hdMassachusetts Ave HD -0.83926 0.05942 -14.125 < 2e-16 ***
## LABEL_hdMeridian Hill -0.45556 0.05467 -8.334 2.43e-15 ***
## LABEL_hdMt. Pleasant HD -0.49051 0.04465 -10.985 < 2e-16 ***
## LABEL_hdMt. Vernon Square HD -0.23023 0.07137 -3.226 0.001388 **
## LABEL_hdPennsylvania Ave NHS -0.50995 0.09033 -5.646 3.67e-08 ***
## LABEL_hdShaw HD -0.37283 0.05110 -7.296 2.41e-12 ***
## LABEL_hdSheridan-Kalorama HD -0.85809 0.06002 -14.297 < 2e-16 ***
## LABEL_hdSixteenth St HD -0.51052 0.04910 -10.397 < 2e-16 ***
## LABEL_hdStrivers' Section HD -0.46383 0.05741 -8.080 1.39e-14 ***
## LABEL_hdTakoma Park HD -0.27996 0.07426 -3.770 0.000195 ***
## LABEL_hdWashington Heights HD -0.62519 0.05751 -10.871 < 2e-16 ***
## LABEL_hdWoodley Park HD -0.86442 0.06097 -14.179 < 2e-16 ***
## year_hd1980 -0.03296 0.02632 -1.252 0.211483
## year_hd1990 -0.09270 0.02961 -3.131 0.001906 **
## year_hd2000 -0.15212 0.03284 -4.632 5.31e-06 ***
## year_hd2010 -0.28279 0.03314 -8.533 6.07e-16 ***
## year_hd2020 -0.35060 0.03541 -9.901 < 2e-16 ***
## treated:desig_yet_hd -0.09416 0.02862 -3.290 0.001114 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.61 on 316 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8512, Adjusted R-squared: 0.8337
## F-statistic: 48.84 on 37 and 316 DF, p-value: < 2.2e-16
##
## [1] "__________________________________"
## [1] "D-in-D regression for the % of white residents, weighted by HD population"
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## LABEL_hd + year_hd, data = comp_df[comp_df$group == "white",
## ], weights = n_tot_hd)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -30.4541 -3.8231 -0.2287 4.1979 24.9094
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.090383 0.038978 -2.319 0.021043 *
## treated 0.077826 0.021468 3.625 0.000337 ***
## desig_yet_hd 0.002694 0.028814 0.093 0.925568
## LABEL_hdBlagden Alley/Naylor Court HD 0.247361 0.103644 2.387 0.017591 *
## LABEL_hdBloomingdale HD 0.111084 0.049352 2.251 0.025083 *
## LABEL_hdCapitol Hill HD 0.371868 0.038031 9.778 < 2e-16 ***
## LABEL_hdCleveland Park HD 0.772764 0.048245 16.017 < 2e-16 ***
## LABEL_hdDowntown HD 0.303130 0.085888 3.529 0.000478 ***
## LABEL_hdDupont Circle HD 0.667646 0.040836 16.350 < 2e-16 ***
## LABEL_hdFinancial HD 0.417388 0.197261 2.116 0.035134 *
## LABEL_hdFoggy Bottom HD 0.787806 0.076346 10.319 < 2e-16 ***
## LABEL_hdFoxhall HD 0.837506 0.096086 8.716 < 2e-16 ***
## LABEL_hdGreater 14th St HD 0.386991 0.044985 8.603 3.70e-16 ***
## LABEL_hdGreater U St HD 0.206565 0.044533 4.638 5.15e-06 ***
## LABEL_hdGWU / Old West End HD 0.718888 0.061477 11.694 < 2e-16 ***
## LABEL_hdKalorama Triangle HD 0.633754 0.052342 12.108 < 2e-16 ***
## LABEL_hdKingman Park HD 0.021839 0.057131 0.382 0.702521
## LABEL_hdLafayette Square HD 0.788443 0.507340 1.554 0.121168
## LABEL_hdLeDroit Park HD 0.067488 0.070094 0.963 0.336373
## LABEL_hdLogan Circle HD 0.341702 0.073845 4.627 5.41e-06 ***
## LABEL_hdMassachusetts Ave HD 0.749577 0.056530 13.260 < 2e-16 ***
## LABEL_hdMeridian Hill 0.310939 0.052009 5.979 6.08e-09 ***
## LABEL_hdMt. Pleasant HD 0.324943 0.042482 7.649 2.47e-13 ***
## LABEL_hdMt. Vernon Square HD 0.161960 0.067905 2.385 0.017664 *
## LABEL_hdPennsylvania Ave NHS 0.413593 0.085939 4.813 2.31e-06 ***
## LABEL_hdShaw HD 0.243955 0.048617 5.018 8.74e-07 ***
## LABEL_hdSheridan-Kalorama HD 0.780001 0.057103 13.660 < 2e-16 ***
## LABEL_hdSixteenth St HD 0.390276 0.046719 8.354 2.11e-15 ***
## LABEL_hdStrivers' Section HD 0.340975 0.054619 6.243 1.38e-09 ***
## LABEL_hdTakoma Park HD 0.212167 0.070655 3.003 0.002888 **
## LABEL_hdWashington Heights HD 0.514508 0.054714 9.404 < 2e-16 ***
## LABEL_hdWoodley Park HD 0.785536 0.058003 13.543 < 2e-16 ***
## year_hd1980 0.007038 0.025045 0.281 0.778872
## year_hd1990 0.041604 0.028170 1.477 0.140702
## year_hd2000 0.048793 0.031248 1.562 0.119406
## year_hd2010 0.184377 0.031532 5.847 1.25e-08 ***
## year_hd2020 0.172556 0.033690 5.122 5.27e-07 ***
## treated:desig_yet_hd 0.103555 0.027227 3.803 0.000171 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.192 on 316 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8233, Adjusted R-squared: 0.8026
## F-statistic: 39.79 on 37 and 316 DF, p-value: < 2.2e-16
comp_df <- run_regressions(hd_comp_df, near_comp_df, weights = F)
## [1] "__________________________________"
## [1] "D-in-D regression for the % of black residents, NOT weighted by HD population"
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## LABEL_hd + year_hd, data = comp_df[comp_df$group == "black",
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36570 -0.10361 -0.00381 0.08492 0.40910
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.127628 0.046566 24.216 < 2e-16 ***
## treated -0.081062 0.023017 -3.522 0.000491 ***
## desig_yet_hd -0.037863 0.031670 -1.196 0.232763
## LABEL_hdBlagden Alley/Naylor Court HD -0.418740 0.059032 -7.093 8.49e-12 ***
## LABEL_hdBloomingdale HD -0.176876 0.061167 -2.892 0.004095 **
## LABEL_hdCapitol Hill HD -0.437248 0.058304 -7.499 6.40e-13 ***
## LABEL_hdCleveland Park HD -0.891329 0.058487 -15.240 < 2e-16 ***
## LABEL_hdDowntown HD -0.599615 0.059931 -10.005 < 2e-16 ***
## LABEL_hdDupont Circle HD -0.801907 0.058304 -13.754 < 2e-16 ***
## LABEL_hdFinancial HD -0.686026 0.059849 -11.463 < 2e-16 ***
## LABEL_hdFoggy Bottom HD -0.895844 0.058487 -15.317 < 2e-16 ***
## LABEL_hdFoxhall HD -0.944705 0.059931 -15.763 < 2e-16 ***
## LABEL_hdGreater 14th St HD -0.509427 0.059032 -8.630 2.96e-16 ***
## LABEL_hdGreater U St HD -0.328614 0.059032 -5.567 5.51e-08 ***
## LABEL_hdGWU / Old West End HD -0.925058 0.061167 -15.124 < 2e-16 ***
## LABEL_hdKalorama Triangle HD -0.767472 0.058487 -13.122 < 2e-16 ***
## LABEL_hdKingman Park HD -0.063425 0.061167 -1.037 0.300559
## LABEL_hdLafayette Square HD -0.849017 0.061223 -13.868 < 2e-16 ***
## LABEL_hdLeDroit Park HD -0.140810 0.058304 -2.415 0.016292 *
## LABEL_hdLogan Circle HD -0.450672 0.058304 -7.730 1.42e-13 ***
## LABEL_hdMassachusetts Ave HD -0.867119 0.058304 -14.872 < 2e-16 ***
## LABEL_hdMeridian Hill -0.495563 0.061167 -8.102 1.16e-14 ***
## LABEL_hdMt. Pleasant HD -0.527034 0.058487 -9.011 < 2e-16 ***
## LABEL_hdMt. Vernon Square HD -0.258366 0.059032 -4.377 1.64e-05 ***
## LABEL_hdPennsylvania Ave NHS -0.502061 0.059032 -8.505 7.15e-16 ***
## LABEL_hdShaw HD -0.399129 0.059032 -6.761 6.53e-11 ***
## LABEL_hdSheridan-Kalorama HD -0.896236 0.058487 -15.324 < 2e-16 ***
## LABEL_hdSixteenth St HD -0.550494 0.058304 -9.442 < 2e-16 ***
## LABEL_hdStrivers' Section HD -0.516453 0.058487 -8.830 < 2e-16 ***
## LABEL_hdTakoma Park HD -0.318863 0.058487 -5.452 1.00e-07 ***
## LABEL_hdWashington Heights HD -0.672527 0.059931 -11.222 < 2e-16 ***
## LABEL_hdWoodley Park HD -0.894963 0.059032 -15.161 < 2e-16 ***
## year_hd1980 0.005156 0.027103 0.190 0.849244
## year_hd1990 -0.029076 0.029979 -0.970 0.332850
## year_hd2000 -0.089720 0.033808 -2.654 0.008356 **
## year_hd2010 -0.201509 0.035602 -5.660 3.37e-08 ***
## year_hd2020 -0.250573 0.038071 -6.582 1.91e-10 ***
## treated:desig_yet_hd -0.058482 0.030542 -1.915 0.056407 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1428 on 319 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8353, Adjusted R-squared: 0.8162
## F-statistic: 43.71 on 37 and 319 DF, p-value: < 2.2e-16
##
## [1] "__________________________________"
## [1] "D-in-D regression for the % of white residents, NOT weighted by HD population"
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## LABEL_hd + year_hd, data = comp_df[comp_df$group == "white",
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35371 -0.08122 -0.00300 0.09326 0.42613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05391 0.04455 -1.210 0.22720
## treated 0.06904 0.02202 3.135 0.00188 **
## desig_yet_hd 0.02159 0.03030 0.712 0.47674
## LABEL_hdBlagden Alley/Naylor Court HD 0.27904 0.05648 4.941 1.26e-06 ***
## LABEL_hdBloomingdale HD 0.14035 0.05852 2.398 0.01705 *
## LABEL_hdCapitol Hill HD 0.40590 0.05578 7.276 2.68e-12 ***
## LABEL_hdCleveland Park HD 0.81521 0.05596 14.568 < 2e-16 ***
## LABEL_hdDowntown HD 0.31479 0.05734 5.490 8.22e-08 ***
## LABEL_hdDupont Circle HD 0.69654 0.05578 12.487 < 2e-16 ***
## LABEL_hdFinancial HD 0.59786 0.05726 10.441 < 2e-16 ***
## LABEL_hdFoggy Bottom HD 0.78872 0.05596 14.095 < 2e-16 ***
## LABEL_hdFoxhall HD 0.87223 0.05734 15.211 < 2e-16 ***
## LABEL_hdGreater 14th St HD 0.41183 0.05648 7.291 2.44e-12 ***
## LABEL_hdGreater U St HD 0.23244 0.05648 4.115 4.93e-05 ***
## LABEL_hdGWU / Old West End HD 0.81044 0.05852 13.848 < 2e-16 ***
## LABEL_hdKalorama Triangle HD 0.66525 0.05596 11.888 < 2e-16 ***
## LABEL_hdKingman Park HD 0.04983 0.05852 0.851 0.39515
## LABEL_hdLafayette Square HD 0.75342 0.05858 12.862 < 2e-16 ***
## LABEL_hdLeDroit Park HD 0.09812 0.05578 1.759 0.07953 .
## LABEL_hdLogan Circle HD 0.36616 0.05578 6.564 2.12e-10 ***
## LABEL_hdMassachusetts Ave HD 0.76664 0.05578 13.743 < 2e-16 ***
## LABEL_hdMeridian Hill 0.34118 0.05852 5.830 1.36e-08 ***
## LABEL_hdMt. Pleasant HD 0.35597 0.05596 6.361 6.94e-10 ***
## LABEL_hdMt. Vernon Square HD 0.18100 0.05648 3.205 0.00149 **
## LABEL_hdPennsylvania Ave NHS 0.39792 0.05648 7.045 1.15e-11 ***
## LABEL_hdShaw HD 0.26453 0.05648 4.684 4.18e-06 ***
## LABEL_hdSheridan-Kalorama HD 0.80853 0.05596 14.449 < 2e-16 ***
## LABEL_hdSixteenth St HD 0.41875 0.05578 7.507 6.11e-13 ***
## LABEL_hdStrivers' Section HD 0.38082 0.05596 6.805 5.00e-11 ***
## LABEL_hdTakoma Park HD 0.24564 0.05596 4.390 1.55e-05 ***
## LABEL_hdWashington Heights HD 0.55117 0.05734 9.612 < 2e-16 ***
## LABEL_hdWoodley Park HD 0.80725 0.05648 14.292 < 2e-16 ***
## year_hd1980 -0.03912 0.02593 -1.509 0.13237
## year_hd1990 -0.02383 0.02868 -0.831 0.40671
## year_hd2000 -0.01792 0.03235 -0.554 0.57995
## year_hd2010 0.09108 0.03406 2.674 0.00788 **
## year_hd2020 0.05635 0.03643 1.547 0.12287
## treated:desig_yet_hd 0.07479 0.02922 2.559 0.01095 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1366 on 319 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8179, Adjusted R-squared: 0.7967
## F-statistic: 38.71 on 37 and 319 DF, p-value: < 2.2e-16